
********* This program generates the network datasets that go into the R code to construct population-wide networks *********
*** NOTE that this is part 1 of the program generating the network data for analysis. One should run the code in R before running part 2 of the program 

clear all
set more off, permanently
cd "$localdir\Data"
global output "$localdir\Output"


***************************************************** NETWORK CONSTRUCTION ******************************************************

* Construct networks from 2008-2009 for survey 2010, networks from 2009-2010 for survey 2011, etc. 


***** Family links: Partners and parents *****

use registerdata19802012_us.dta, clear
keep *pnr* year
save pnr_1980_2012.dta, replace

drop year
duplicates drop

stack pnr partner_pnr mom_pnr dad_pnr, into(new) clear
keep new
rename new pnr
duplicates drop
drop if pnr==.
sort pnr
g unique = _n
save "Unique ID (pnr).dta", replace
rename pnr partner_pnr
rename unique unique_partner
save "Unique ID (partner).dta", replace
rename partner_pnr mom_pnr
rename unique unique_mom
save "Unique ID (mom).dta", replace
rename mom_pnr dad_pnr
rename unique unique_dad
save "Unique ID (dad).dta", replace

* Year-specific networks
* For family, the network is taken from the year before the survey. For firms and eduation, we include links two years prior to the survey
* Note: check that mom, dad, and partner are actually still alive (still in population data)

forval i=2010/2013 {
use registerdata19802012_us.dta, clear
keep pnr year
g j=`i'-1
keep if year==j 
save "Pnr `i'.dta", replace
rename pnr partner_pnr
save "Partner `i'.dta", replace
rename partner_pnr mom_pnr
save "Mom `i'.dta", replace
rename mom_pnr dad_pnr
save "Dad `i'.dta", replace
}

forval i=2010/2013 {
use registerdata19802012_us.dta, clear
keep *pnr* year

* Only parents and partner links the year before the survey
g j=`i'-1
keep if year==j 
duplicates drop

merge m:1 pnr using "Unique ID (pnr).dta"
drop if _merge==2 // Should have no _merge==1
drop _merge

merge m:1 partner_pnr using "Unique ID (partner).dta"
drop if _merge==2
drop _merge
merge m:1 partner_pnr using "Partner `i'.dta" // Make sure partner is still alive (in population)
drop if _merge==2
replace partner_pnr=. if _merge!=3
drop _merge

merge m:1 mom_pnr using "Unique ID (mom).dta"
drop if _merge==2
drop _merge
merge m:1 mom_pnr using "Mom `i'.dta" // Make sure mom is still alive (in population)
drop if _merge==2
replace mom_pnr=. if _merge!=3
drop _merge

merge m:1 dad_pnr using "Unique ID (dad).dta"
drop if _merge==2
drop _merge
merge m:1 dad_pnr using "Dad `i'.dta" // Make sure dad is still alive (in population)
drop if _merge==2
replace dad_pnr=. if _merge!=3
drop _merge

* Pnr and unique
save "All Unique ID `i'.dta", replace 

keep unique* 

* Only unique
save "Only Unique ID `i'.dta", replace 
}


***** Education links: high school graduate cohorts and university degree-subject cohorts *****

forval i=2010/2013 {
use educid_20082012.dta, clear

* Only education links two years prior to the survey
g j=`i'-1
g k=`i'-2
keep if year>=k & year<=j

* Merge in the unique identifier (to replace pnr)
merge m:1 pnr using "Unique ID (pnr).dta"
drop if _merge==2
drop _merge 

* Keep people observed in survey year t-1 (but keep network links for both t-1 and t-2)
gen year`i'=(year==j)
bys pnr: egen obsyear`i'=max(year`i')
keep if obsyear`i'==1
drop year`i' obsyear`i' j k year

keep unique educ_id
order unique educ_id

duplicates drop

save "Unique Education-cohort `i'.dta", replace

}


***** Workplace links: Coworkers 2008-2011 *****

forval i=2010/2013 {
use firmid_20082012.dta, clear

* Only education links two years prior to the survey
g j=`i'-1
g k=`i'-2

keep if year>=k & year<=j

* Merge in the unique identifier (to replace pnr)
merge m:1 pnr using "Unique ID (pnr).dta"
drop if _merge==2
drop _merge 

* Merge in education codes

merge m:1 pnr year using occupation_education_20082012.dta
drop if _merge==2
drop _merge

keep firm year pnr workers workers_total unique educcode educ1 educ2 educ3

* Missing education info gets its own category
replace educ1=999 if educ1==.
replace educ2=999 if educ2==.
replace educ3=999 if educ3==.

* Create link 1: unique firm-year identifier
egen link1=group(year firm)

* Create link 2: unique firm-year-education 1-digit identifier
egen link2=group(year firm educ1)

egen link1_max=max(link1)
replace link2=link2 + link1_max
drop link1_max

* Final link - control for firm size and total no of coworkers over time
g unique_firm_year = link1
replace unique_firm_year = link2 if workers>25 & workers!=.
replace unique_firm_year = link2 if workers_total>=50

duplicates tag unique_firm_year, g(dup)
sum dup, de

* Keep people observed in survey year t-1 (but keep network links for both t-1 and t-2)
g j=`i'-1
gen year`i'=(year==j)
bys pnr: egen obsyear`i'=max(year`i')
keep if obsyear`i'==1
drop year`i' obsyear`i' year j

keep unique unique_firm_year

duplicates drop

save "Unique Firm-year `i'.dta", replace

}



***** Political survey participants *****
* We only want to focus on these because we have political outcomes (so we create this dataset of all surveyed individuals to extract only the relevant rows from the connectivity matrix)

use survey2010_us.dta, clear
keep pnr
save "S_2010.dta", replace

use survey2011_us.dta, clear
keep pnr
save "S_2011.dta", replace

use survey2012_us.dta, clear
keep pnr
save "S_2012.dta", replace

use survey2013_us.dta, clear
keep pnr
save "S_2013.dta", replace

use survey2014_us.dta, clear
keep pnr
save "S_2014.dta", replace

use "S_2010.dta", clear
append using "S_2011.dta"
append using "S_2012.dta"
append using "S_2013.dta"
append using "S_2014.dta"

duplicates drop

destring pnr, replace

merge 1:1 pnr using "Unique ID (pnr).dta"
keep if _merge==3
drop pnr _merge

save "Survey Members.dta", replace




*** MOVE TO R NOW - run the code to create the matrices, then run part 2 of this program 















